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proc UpdateReplica 

r 2 : Replica // New replica contents, 
preconditions: 

V (pfid.fname): r 2 .bptrs • pfid € dom (DISK) 
and lsLive(r 2 ) => lsLive(DISK(pfid)) 
postconditions: 

r 2 .bptr^{} < 3 > 



FIG. 4 



• "(vali , . . . , val n )" represents a tuple of values. 

• "P type" represents a (possibly empty) set of type. 
"Pj type" represents a nonempty set of type. 
"Key >-» Val" represents a one-to-many mapping from 
type Key to Val. 

• "dom(iO" returns the domain of function (or mapping) F, 
and "ran(F)" returns the range of F. For instance, 

dom({l — 3,2 — 8,4 — 3}) = {1,2,4}, 
ran({l — 3,2 — 8,4 — 3}) = {3,8}. 

• "X ©Y" substitutes a part of mapping X by Y. E.g., 

{1 ^3,2^ 1}©{1 — 5,3 — 4} 
= {1—5,2—1,3 — 4}. 

• "X «aY" means function-domain restriction. E.g., 

{2} < {1 — 3,2 — 8,4 — 6} = {1 — 3,4 — 6}. 

• "War: set •expr" means that expr holds for var in set . 

E.g., 

Vn: {11,13,17} •IsPrime(n). 

• "O expr" means that expr holds eventually. 

• "{var: set •expr}" means set comprehension. E.g., 

{x : {1,2,3}. x 2 } = {1,4,9}. 

FIG. 5 
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type Replica = record 

fidM : FilelD 

peers< 2 > ; P NodelD 

gpeers^ : P x NodelD 

bptrsW : P Backptr 

deadBpt^ : Backptr 

te< 6 > . Timestamp 
type RegularReplica inherits Replica = 

contents^ : Data 

Invariants: 

-ilsLive(r) => contents = {} 
type DirReplica inherits Replica = 

enfc< 8 > : (FilelD.String) ~ DEntry 

Invariants: 

-.IsLive(r) => ents = {} 
type Backptr = (FilelD, String) 
type Dentry = record 

valid® : boo I 

f5<io> ; Timestamp ^ 
gpeers< n > :P X NodelD 
proc IsLive(r) 

return r is the root or r.bptrs ^ {} 
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DISK: FilelD >-> Replica 
CLOG: FilelD >-* P x NodelD 
ULOG: FilelD IP Backptr 
Invariants:. 

// Updates are only for existing replicas. 
dom(CLOG) u dom(ULOG) C dom(D/S/<) < 12 > 



FIG. 8 



proc Create 

d: DirReplica // The local replica of the parent directory. 
fname: string // The name of the new file in d 
gpeers: P 2 NodelD // The placement of the replicas of the file, 
preconditions: 

IsLive(d) < 13 > 



r <— NewreplicaQ 
r.fid <— NewfileidQ 
r gpeers «— gpeers 
r.ts <— NewtimestampO 
r.peers «— {} 

r.bptrs <— {{d.ftd, fname)} 
r.contents <— None 
UpdateReplica(r) 



FIG. 9 



proc Unlink 

f: Replica // The file to be unlinked. 

d: DirReplica // The directory the file belongs to. 

fname: string // f's name in d. 
preconditions: 

IsLive(d) 

f is a directory =*> f.ents = {} 
(d. fid, fname) e f.bptrs 



V <— Deepcopy(f) 

f.bptrs f.bptrs \ {(d.fid,fname)} 
if f.bptrs — {} then 

f'.deadBptr <r- (d. fid, fname) 
f'.ts <— NewtimestampO 
UpdateReplica(f') 



FIG. 10 
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proc Hardlink 

f: RegularReplica // The replica of the file. 

d: DirReplica // The directory to which f will be linked to. 

fname: string // The filename within d. 
preconditions: 

lsLive(c/) 



V <— Deepcopy(Z) 

f'.bptrs *- f.bptrs U {(d.fid, fname)} 
f'.ts <— NewtimestampQ 
UpdateReplica(f) 
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proc Rename 

f: Replica // The file to be moved. 

cJf: DirReplica // The origin dir. 

d T : DirReplica // The destination dir. 

fname F : string //The filename in d F 

fname T : string // The filename in d T 
preconditions: 

lsLive(o» and lsLive(d r ) 

(d F .fid, fname F ) e f.bptrs 



f <— Deepcopy(r) 

f'.bptrs *- f.bptrs \ { {dp . fid, fname F ) } u {(d T .fid,fname T )} 

f'.ts <— Newtimestamp() 

UpdateReplica(f) 
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proc Write 

f: RegularReplica 

newcontents: Data 



f'<— Deepcopy(f) 

f\ contents «~ newcontents 

f'.ts <— NewtimestampO 

UpdateReplica(0 
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proc UpdateReplica 

r 2 : Replica // New replica contents, 
preconditions: 

// AH parent directories are stored locally. 

// Moreover, if r 2 is live, then parent must also be live. 

V (pfid.fname): r 2 .bptrs • pfid e dom(D/SK) 
and fsUve(r 2 ) => lsLive{DISK(pfid)) < 14 > 



if r 2 .fid i dom(DISK) then 

// The replica isn't locally stored yet. 
DISK — DISKU { r 2 .fid^ r 2 } 
lssueCupdate(r2) 
return 

n <- DISK(r 2 .fid) 

if File is regular then 

Do some application-specific stuff. 

We can potentially use version vectors here, 
else 

// Union dir entries, taking ones with newer timestamps on conflict, 
for (key e) € r 2 .ents 
if key £ dom(r\.ents) or r\ .ents(key).ts < e.ts 

ri.ents r\ .ents e {key »-> e} 
for each added or deleted entry (fid, f name) in r\ .ents 

// Entry (fidjname) is potentially inconsistent. Fix up later, 
if fid € dom(DISK) then 

lssuellupdate(D/SK(/7cO, {})< 15) 

if r 2 .ts > ri.tethen< 16 > 

// The file's attributes are to be updated. 
ri.te<— r 2 .ts 

if r\ .gpeers ^ r 2 .gpeers then 
r\.gpeers «- r 2 .gpeers 

// When the replica's gold-peer set changes, I must reflect the 
// change to the parent dir entry. 
Issuellupdate(r, {}) 

// Resolve potential conflicts on back pointers 

if r { .bptrs ^ r 2 .bptrs or r\.deadBptr^ r 2 .deadBptr then 

lssueUupdate(r 1/ r\.bptrs \ r 2 .bptrs)W 

ri.bptrs <— r 2 .bptrs 

r x .deadBptr *- r 2 .deadBptr 

// If the last link to the replica is gone, erase the contents, 
if -1 IsLive(ri) then 

if r\ is a regular file then 
n. contents <— None 

else 

for e € ry.ents • e.valid 

lssueUupdate(DISK(e.fid), {})< lg > 
ri.ents <— {} 

if Any of r\ 's attributes has changed then 
lssueCupdate(ri)< 19 > 
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Protocol for adding a replica. 

Constants : 

M: Number of neighbors per replica. 

MAXHOPS: The number of hops per a random walk (the 
usual value is 3) 

# 

# AddReplica is the main procedure that adds 

# a replica of file F on the executing node. 
# 

AddReplica ( F, G) 

G: the set of gold replicas of F. 

(G is obtained by looking up the parent directory) 

g = Pick a random live node in G. 
Send to g, "CreateReplica { F, myself)" 
Wait for the contents to arrive. 
Store contents and reply the client. 

r = find the replica of F. 
Send to g, "StartRandomWalk (F, myself)" 
Wait for the set of neighbors N to arrive, 
for n in N: 

Add edge to n in r. 

Send to n, "AddEdge(F, myself)" 

SendReplicaContents (F, Sender) : 
F: the ID of the file 

Sender: the node requesting replica creation, 
r - find the replica of F 

n = pick the replica closest to Sender among 
graphneighbors of r. 

Send to n, "SendReplicaContents ( F, Sender)" 

SendReplicaContents (F, Sender) : 
F: the ID of the file 

Sender: the node requesting replica creation. 

r = find the replica of F 
Send r to Sender. 

StartRandomWalk (F, Sender) : 

F: the ID of the file 
Sender: the node requesting replica creation. 



FIG. 16A 
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r = find the replica of F 
N = { } 

for i = 0 to M-2: 

n = pick random graph neighbor in r. 

Send to n, "DoRandomWalk ( F, 0, myself)" 

Receive nodeid from n. 

Add nodeid to N. 
Send N to Sender. 

DoRandomWalk (F, hops, prevHopNode) : 
F: the ID of the file 

hops: the number of hops made so far. 

if hops — MAXHOPS 

Send myself to prevHopNode 

else 

r = find the replica of F. 

n = pick random graph neighbor in r 

Send to n, "DoRandomWalk (F, hops + 1, myself)" 

Receive nodeid from n. 

Send nodeid to prevHopNode 

AddEdge(F, peer): 

F: the ID of the file 
peer: the node to span edge to 
r - find the replica of F 
Add edge to peer in r 



FIG. 16B 
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proc IssueCupdate 

r: Replica // The replica of the file being updated. 



CLOG(r.fid) «- r.gpeers u r.peers 

proc PropagateCupdate // Runs periodically in the background 



for (fid i — ► targets) e CLOG 
r^-DlSK(fid) //See (12). 

// Send the location of the parent dirs so that the target can 
// replicate them to ensure name-space containment. 
pDirs {p: r.bptrs • (p.fid, DISK(p.fid).gpeers)} // See (14). 
for n e targets 

send (CUPDATE, r, pDirs) to n. 
when receive (CUPDATE-REPLY, ts) from node n 
if CLOG(fid).ts = ts 

CLOG(fid) <- CLOG(fid) \ {n} 

Remove fid from CLOG when CLOG(fid) becomes empty 



when receive (CUPDATE, r, pDirs) 
r: Replica // New replica contents. 

pDirs: IP (FilelD, P NodelD) //Name and location of parent dirs. 



for (pfid, ppeers) e pDirs 
CreateReplica(pffd, ppeers) 
ResurrectDirectory(p//d) 

UpdateReplica(r) 

send ( CUPDATE-REPLY, r.ts) 
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proc IssueUupdate 

r: Replica // The replica of the file 

del: P (FilelD.String) //Backpointers deleted from the replica. 



if r.fide dom( ULOG) then 
del «- del u ULOG(r.fid) 
ULOG *- ULOG © {r.fid ^ del} 

proc ProcessUupdate // Called periodically in the background. 



for (fid i — ► del) e ULOG 
r *- DlSK(fid) // See (12). 
for pfid, fname e del u r.bptrs 

ResurrectDirectory(p/7c/) 

d DISK(pfid) 

valid — pfid e r.bptrs // Is this entry to be added? 
new= ((fid.fname) >-> Dentryiyalid, r.ts, r.gpeers)} 
d.ents *- ((fid.fname) <3 d.ents) u new 
if d.ents has changed 

d.te <— NewtimestampO 

IssueCupdate(d) 
ULOG^{} 
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proc Create Replica 

fid: FilelD // The ID of the file 

peers: F l NodelD // The known set of gold peers of the file 
postconditions: 

fid € dom(D/SK) 



if fid £ dom(DlSK) then 
return 

send (SEND-CONTENTS, fid) to random node n e peers 
Wait until receive (CONTENTS, r, pDirs) from n 
for (pfid, ppeers) e pDirs 

CreateReplica (pfid, ppeers) 
UpdateReplica(r) 

Add edges between r and random existing replicas, 
when receive {SEND-CONTENTS, fid) from node n 



r<- DISK(fid) 

pDirs <- {p: r.bptrs • (p. fid, DISK(p.fid).gpeers)} // See (14). 
send (CONTENTS, r, pDirs) to n. 



FIG. 20 
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proc ResurrectDirectory 

fid. FilelD 
preconditions: 

fid e dom (DISK) 
fid is a directory 
postconditions: 

\sL\ve{DISK(fid)) 



r*- DISK(fid) 
if IsLive(r) then 
return 

ResurrectDirectory(r.deadBpfr.pf/d) 
r.bptrs <— { r.deadBptr} 
r.ts <— NewtimestampO 
I ssueC update (r) 
let (pfid, fname) = r.deadBptr* 
d *- DISK(pfid) 

d.ents((fid,friame)) <— Dentty(true, r.ts, r.gpeers) ( 20 > 

d.ts <— NewtimestampO 

IssueCupdate(d) 
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// Called every third night for every replica on the node, 
proc GarbageCollection 

LiveNodes: W > 1 NodelD // Set of live nodes. 

r: Replica // Replica to be inspected. 

EXPIRE: integer // Dead-replica expiration period, e.g., a month. 

// Remove old tombstones. Removing after EXPIRE seconds is safe 
// because we cannot receive any new update with timestamp older 
// than EXPIRE after removing r. 
if - IsLive(r) and r.ts < Newtimestamp - EXPIRE then 

DISK ^ {r. fid} «3D/SK 

return 

r 1 <— Deepcopy(r) 

// Remove dead entries in the directory 
if f is a directory then 
for (key vaf): r'.ents • 

if va/.va//'dand val.ts < NewtimestampQ - EXPIRE then 
r'.ents <- {key} < r'.ents 
if at least one entry has been removed from r'.ents then 
r'.ts <— NewTimestamp() 
UpdateReplica(r) 

// If some gold peers are found dead, recreate one elsewhere 
if me € r.gpeers and r.gpeers % Livenodes then 

newNodes <— Pick \\r.gpeers \ Livenodes\\ random live nodes. 

r'.gpeers <— r.gpeers \ Livenodes u newNodes 

r'.ts <— NewTimestampO 

UpdateReplica(r) 

// If we find graph edges to dead nodes, re-span it. 
for n e r. peers \ Livenodes 

Add edges between r and a random replica. 

r.peers <— r.peers n Livenodes 
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V f: ran(D/SK) and lsLive(/) • 
( {d.fid, fname) e f.bptrs 
=^ ofG dom(DISK) and IsLive(d) ) 
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V d: ran(DISK) and IsLive(d) • 
( f: ran(DISK) and IsLive(f) • 
({d.fid,fname)*-+ ent) € d.ents and ent valid 
and (d. fid, f name) g f.bptr 
O (f.fid,fname)y-+ ent) £ d.ents ) 
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V d: ran(D/SK) and lsl_ive(cQ • 
( (f.fid,fname))-> ent) e d.ents and ent.valid 
=► f: ran(D/SK') and lsLive(0 ) 
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